#' ---
#' title: "Regression and Other Stories: Unemployment"
#' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
#' date: "`r format(Sys.Date())`"
#' output:
#'   html_document:
#'     theme: readable
#'     toc: true
#'     toc_depth: 2
#'     toc_float: true
#'     code_download: true
#' ---

#' Time series fit and posterior predictive model checking for
#' unemployment series. See Chapter 11 in Regression and Other
#' Stories.
#' 
#' -------------
#' 

#+ setup, include=FALSE
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE

#' #### Load packages
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("ggplot2")
library("bayesplot")
theme_set(bayesplot::theme_default(base_family = "sans"))
#+ eval=FALSE, include=FALSE
# grayscale figures for the book
if (savefigs) color_scheme_set(scheme = "gray")

#' #### Load data
unemp <- read.table(root("Unemployment/data","unemp.txt"), header=TRUE)
head(unemp)

#' #### Plot the unemployment rate
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("Unemployment/figs","unemployment1.pdf"), height=3, width=4.5)
#+
par(mar=c(3,3,1,.1), mgp=c(1.7,.5,0), tck=-.01)
plot(unemp$year, unemp$y, type="l", ylab="Unemployment rate", xlab="Year", yaxs="i",
  ylim=c(0, max(unemp$y)*1.05), xaxt="n", yaxt="n", bty="l")
axis(1, seq(1950,2010,10), rep("",7))
axis(1, seq(1950,2010,20))
axis(2, seq(0,10), rep("",11))
axis(2, c(0,5,10), paste (c(0,5,10), "%", sep=""))
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' #### Fit a 1st-order autogregression
n <- nrow(unemp)
unemp$y_lag <- c(NA, unemp$y[1:(n-1)])
fit_lag <- stan_glm(y ~ y_lag, data=unemp, refresh=0)
print(fit_lag, digits=2)

#' #### Simulate replicated datasets using posterior predict
y_rep <- posterior_predict(fit_lag)
y_rep <- cbind(unemp$y[1], y_rep)
n_sims <- nrow(y_rep)

#' #### Simulate replicated datasets "manually"
sims <- as.matrix(fit_lag)
n_sims <- nrow(sims)
y_rep <- array(NA, c(n_sims, n))
for (s in 1:n_sims){
  y_rep[s,1] <- unemp$y[1]
  for (t in 2:n){
    y_rep[s,t] <- sims[s,"(Intercept)"] + sims[s,"y_lag"] * y_rep[s,t-1] + rnorm(1, 0, sims[s,"sigma"])
  }
}

#' #### Plot the simulated unemployment rate series
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("Unemployment/figs","unemployment2.pdf"), height=4.5, width=7.5)
#+
par(mar=c(1,1,3,.1), mgp=c(2,.5,0), tck=-.01)
par(mfrow=c(3,5))
for (s in sort(sample(n_sims, 15))){
  plot (unemp$year, y_rep[s,], type="l", ylab="", xlab="", yaxs="i",
  ylim=c(0, max(unemp$y)*1.05), xaxt="n", yaxt="n", bty="l", main=paste("Simulation", s))
  axis(1, seq(1950,2010,10), rep("",7))
  axis(2, seq(0,10), rep("",11))
}
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' #### Numerical posterior predictive check
test <- function (y){
  n <- length(y)
  y_lag <- c(NA, y[1:(n-1)])
  y_lag_2 <- c(NA, NA, y[1:(n-2)])
  return(sum(sign(y-y_lag) != sign(y_lag-y_lag_2), na.rm=TRUE))
}
test_y <- test(unemp$y)
test_rep <- apply(y_rep, 1, test)
print(mean(test_rep > test_y))
print(quantile(test_rep, c(.1,.5,.9)))
#' #### Plot test statistic for data and histogram of test statistics for replications
ppc_stat(y=unemp$y, yrep=y_rep, stat=test, binwidth = 1) + scale_y_continuous(breaks=NULL)
